In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
In [9]:
xlow = 0
xhigh = np.pi/2
x = np.linspace(xlow, xhigh, num = 100)
y = np.cos(x)
In [10]:
plt.figure()
plt.plot(x, y)
plt.show()
In [18]:
dim = 100
x_mcmc = np.random.random(dim) * (xhigh - xlow) + xlow
y_mcmc = np.cos(x_mcmc)
In [19]:
plt.figure()
plt.plot(x_mcmc, y_mcmc, 'o')
plt.show()
In [21]:
y_mcmc.mean() * (xhigh - xlow)
Out[21]:
In [ ]:
(xhigh - xlow) * np.sqrt(y_mcmc.
In [1]:
from numpy.random import random
the following code is from http://code.activestate.com/recipes/414200-metropolis-hastings-sampler/
In [23]:
def sdnorm(z):
"""
Standard normal pdf (Probability Density Function)
"""
return np.exp(-z*z/2.)/np.sqrt(2*np.pi)
n = 10000
alpha = 1
x = 0.
vec = []
vec.append(x)
innov = random(n) * 2 * alpha - alpha #random inovation, uniform proposal distribution
for i in xrange(1,n):
can = x + innov[i] #candidate
aprob = np.min([1.,sdnorm(can)/sdnorm(x)]) #acceptance probability
u = random(1)
if u[0] < aprob:
x = can
vec.append(x)
In [24]:
len(vec)
Out[24]:
In [25]:
#plotting the results:
#theoretical curve
x = np.arange(-3,3,.1)
y = sdnorm(x)
plt.subplot(211)
plt.title('Metropolis-Hastings')
plt.plot(vec)
plt.subplot(212)
plt.hist(vec, bins=30,normed=1)
plt.plot(x,y,'ro')
plt.ylabel('Frequency')
plt.xlabel('x')
plt.legend(('PDF','Samples'))
plt.show()
my version of the code
In [41]:
def metropolis_hastings(f, init = 0, nsamples = 1000):
alpha = 1
sample_count = 0
x = init
vec = []
vec.append(x)
while sample_count < nsamples-1:
can = x + random(1)[0] * 2 * alpha - alpha #candidate
aprob = np.min([1.,sdnorm(can)/sdnorm(x)]) #acceptance probability
u = random(1)
if u[0] < aprob:
x = can
vec.append(x)
sample_count+=1
return np.array(vec)
In [42]:
vec = metropolis_hastings(sdnorm, nsamples=10000)
In [43]:
len(vec)
Out[43]:
In [44]:
#plotting the results:
#theoretical curve
x = np.arange(-3,3,.1)
y = sdnorm(x)
plt.subplot(211)
plt.title('Metropolis-Hastings')
plt.plot(vec)
plt.subplot(212)
plt.hist(vec, bins=30,normed=1)
plt.plot(x,y,'ro')
plt.ylabel('Frequency')
plt.xlabel('x')
plt.legend(('PDF','Samples'))
plt.show()
In [45]:
def gauss(z):
"""
Standard normal gaussian
"""
return np.exp(-z*z/2.)
In [48]:
import scipy.integrate as integrate
In [50]:
integrate.quad(gauss, -np.Inf, np.Inf)
Out[50]:
In [51]:
np.sqrt(2*np.pi)
Out[51]:
In [167]:
def triangle(z):
if (type(z) is not type([1])) and (type(z) is not type(np.array(1))):
x = np.array([z])
else:
x = np.array(z)
return np.piecewise(x, [x < 0, (x < 10) & (x > 0), (x >= 10) & (x <= 20), x > 20], [0, lambda x: x, lambda x: - x + 20, 0])
In [168]:
triangle([1])
Out[168]:
In [169]:
x = np.arange(-100,100,.1)
plt.figure()
plt.plot(x, triangle(x))
plt.show()
In [170]:
type(np.array([1]))
Out[170]:
In [174]:
integrate.quad(triangle, -np.Inf, np.Inf)
Out[174]:
In [178]:
vec = metropolis_hastings(lambda f: 1/100*triangle(x), nsamples=10000)
In [181]:
x = np.arange(0,100,.1)
y = 1/100 * triangle(x)
plt.subplot(211)
plt.title('Metropolis-Hastings')
plt.plot(vec)
plt.subplot(212)
plt.hist(vec, bins=30,normed=1)
plt.plot(x,y,'ro')
plt.ylabel('Frequency')
plt.xlabel('x')
plt.xlim((-30,30))
plt.legend(('PDF','Samples'))
plt.show()
In [184]:
import pymc
In [185]:
@pymc.stochastic(dtype=int)
def switchpoint(value=1900, t_l=1851, t_h=1962):
"""The switchpoint for the rate of disaster occurrence."""
if value > t_h or value < t_l:
# Invalid values
return -np.inf
else:
# Uniform log-likelihood
return -np.log(t_h - t_l + 1)
In [189]:
from scipy.stats import rv_discrete
from scipy import stats
normdiscrete = stats.rv_discrete(values=(gridint, np.round(probs, decimals=7)), name='normdiscrete')
n_sample = 500
np.random.seed(87655678) # fix the seed for replicability
rvs = normdiscrete.rvs(size=n_sample)
rvsnd = rvs
f, l = np.histogram(rvs, bins=gridlimits)
sfreq = np.vstack([gridint, f, probs*n_sample]).T
print sfreq
In [219]:
from scipy import stats
class deterministic_gen(stats.rv_continuous):
def _pdf(self, x):
return 1/x
In [220]:
deterministic = deterministic_gen(name="deterministic")
In [222]:
deterministic.cdf(np.arange(1, 3, 0.5))
In [223]:
deterministic.pdf(np.arange(-3, 3, 0.5))
Out[223]:
In [224]:
deterministic.rvs(size=10)
In [195]:
deterministic?
In [239]:
def getDistribution(data):
kernel = stats.gaussian_kde(data)
class rv(stats.rv_continuous):
def _rvs(self, *x, **y):
# don't ask me why it's using self._size
# nor why I have to cast to int
return kernel.resample(int(self._size))
def _cdf(self, x):
return kernel.integrate_box_1d(-numpy.Inf, x)
def _pdf(self, x):
return kernel.evaluate(x)
return rv(name='kdedist')
In [240]:
f = getDistribution(np.random.randn(100))
In [243]:
f.rvs([1])
In [231]:
def getDistribution(data):
kernel = stats.gaussian_kde(data)
class rv(stats.rv_continuous):
def _cdf(self, x):
return kernel.integrate_box_1d(-numpy.Inf, x)
def _pdf(self, x):
return kernel.evaluate(x)
return rv(name='kdedist')
Out[231]:
In [244]:
kernel = stats.gaussian_kde(np.random.randn(100))
In [245]:
kernel
Out[245]:
In [246]:
type(kernel)
Out[246]:
In [247]:
kernel.resample(10)
Out[247]:
In [282]:
def getDistribution():
sigma = 1
mu = 0
f = lambda x: 1/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (x - mu)**2 / (2 * sigma**2) )
class rv(stats.rv_continuous):
def _pdf(self, x):
return f(x)
def _cdf(self, x):
return integrate.quad(f, -np.Inf, x)
return rv(name='kdedist')
In [283]:
a = getDistribution()
In [288]:
%%timeit
a.rvs(size=100)
In [285]:
%pylab inline
In [286]:
plt.figure()
plt.hist(a.rvs(size=1000))
plt.show()
In [297]:
integrate.quad(lambda x: x ** 5, 0, 30)
Out[297]:
In [293]:
a = lambda x: x ** 2
In [294]:
a(np.Inf)
Out[294]:
In [ ]:
In [ ]:
In [162]:
from pymc import stochastic, observed, deterministic, uniform_like, runiform, rnormal, Sampler, Normal, Uniform
from numpy import inf, log, cos,array
import pymc
import numpy.random
true_slope = 1.5
true_intercept = 4
N = 30
true_x = runiform(0,50, N)
true_y = true_slope*true_x + true_intercept
data_y = rnormal(true_y, 2)
data_x = rnormal(true_x, 2)
In [163]:
plt.plot(true_x, true_y, '-')
plt.plot(data_x, data_y, 'o')
Out[163]:
In [164]:
A = array([ data_x, np.ones(len(data_x))])
lstsq_theta = numpy.linalg.lstsq(A.T,data_y)[0]
print lstsq_theta
In [165]:
@stochastic
def theta(value=array([2.,5.])):
"""Slope and intercept parameters for a straight line.
The likelihood corresponds to the prior probability of the parameters."""
slope, intercept = value
prob_intercept = uniform_like(intercept, -10, 10)
prob_slope = log(1./cos(slope)**2)
return prob_intercept+prob_slope
init_x = data_x.clip(min=0, max=50)
# Inferred true inputs.
x = Uniform('x', lower=0, upper=50, value=init_x)
@deterministic(plot=False)
def modelled_y(x=x, theta=theta):
"""Return y computed from the straight line model, given the
inferred true inputs and the model paramters."""
slope, intercept = theta
return slope*x + intercept
"""
Input error model.
Define the probability of measuring x knowing the true value.
"""
measured_input = Normal('measured_input', mu=x, tau=2, value=data_x, observed=True)
"""
Output error model.
Define the probability of measuring x knowing the true value.
In this case, the true value is assumed to be given by the model, but
structural errors could be integrated to the analysis as well.
"""
y = Normal('y', mu=modelled_y, tau=2, value=data_y, observed=True)
In [166]:
model = pymc.Model([modelled_y, y, x, theta])
mcmc = pymc.MCMC(model)
mcmc.sample(iter=10000, burn=5000, thin=2)
In [175]:
slope_samples = mcmc.trace('theta')[:,0]
intercept_samples = mcmc.trace('theta')[:,1]
In [176]:
mcmc.trace('theta').stats()
Out[176]:
In [177]:
ax = plt.subplot(211)
plt.hist(slope_samples, histtype='stepfilled', bins=30, alpha=0.85,
label="posterior of slope", color="#A60628", normed=True)
plt.legend(loc="upper left")
plt.xlabel("slope value")
plt.axvline(true_slope)
ax = plt.subplot(212)
plt.hist(intercept_samples, histtype='stepfilled', bins=30, alpha=0.85,
label="posterior of intercept", color="#7A68A6", normed=True)
plt.legend(loc="upper left")
plt.xlabel("intercept value")
plt.axvline(true_intercept)
plt.show()
In [178]:
slope_samples.mean()
Out[178]:
In [179]:
slope_samples.std()
Out[179]:
In [180]:
intercept_samples.mean()
Out[180]:
In [181]:
intercept_samples.std()
Out[181]:
In [182]:
pymc.Matplot.plot(mcmc)
In [151]:
pymc_theta = .stats()["mean"]
In [183]:
numpy.random.seed(15)
true_mu = 1.5
true_tau = 50.0
N_samples = 500
mu = pymc.Uniform('mu', lower=-100.0, upper=100.0)
tau = pymc.Gamma('tau', alpha=0.1, beta=0.001)
data = pymc.rnormal( true_mu, true_tau, size=(N_samples,) )
y = pymc.Normal('y',mu,tau,value=data,observed=True)
In [189]:
mcmc=pymc.MCMC([mu, tau, data, y])
mcmc.sample(iter=1000, burn=500, thin=2)
In [190]:
print 'mu',np.mean(mcmc.trace('mu')[:])
print 'tau',np.mean(mcmc.trace('tau')[:])
In [188]:
pymc.Matplot.plot(mcmc)
In [ ]: